{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [
        "xGQYpfRE21wH",
        "Rrs01ARK30sv",
        "C57rcWw95HmU",
        "nJPhbi4Xy2oR",
        "Yy0TGHMqiTjE",
        "TzoyvzvXz1UU",
        "z_flHdb_6xDw",
        "1d80SXBL7HHv",
        "O6ec5yIr8Z01",
        "ulz0NxLa_1hd",
        "1KVcVFrLyfpy",
        "4ef6XxnT6w2F"
      ],
      "toc_visible": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "##Mount Google Drive and load libraries"
      ],
      "metadata": {
        "id": "xGQYpfRE21wH"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 60,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZXbTFdLQ2VJ2",
        "outputId": "b339ba02-a864-4a66-8898-0d6bf0432058"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount(\"/content/drive\")"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd"
      ],
      "metadata": {
        "id": "Z6UeQkhH3rXL"
      },
      "execution_count": 61,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n"
      ],
      "metadata": {
        "id": "lwTfeUBq3iKe"
      },
      "execution_count": 62,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "TM4vo6aJzZ_N"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Load and clean 2019 data"
      ],
      "metadata": {
        "id": "PnNA84U0wGig"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Read csv file"
      ],
      "metadata": {
        "id": "Rrs01ARK30sv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "path = '/content/drive/My Drive/iSchool/Data Services/Datasets/OregonInternet/'\n",
        "file = 'nov19-cps.csv'\n",
        "df19 = pd.read_csv(path+file, encoding='UTF-8')"
      ],
      "metadata": {
        "id": "Pwh-05b1382k"
      },
      "execution_count": 91,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "###View preview of data"
      ],
      "metadata": {
        "id": "C57rcWw95HmU"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df19.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8PkqcF7O5ON4",
        "outputId": "df8b928b-1d3e-4606-9b26-958af9e293d6"
      },
      "execution_count": 92,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(138850, 989)"
            ]
          },
          "metadata": {},
          "execution_count": 92
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df19.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 299
        },
        "id": "fYPih63J5S3F",
        "outputId": "8406a965-bdd4-48a3-f367-de91edd58d54"
      },
      "execution_count": 93,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "            hrhhid  hrmonth  hryear4  hurespli  hufinal  hetenure  hehousut  \\\n",
              "0     110157414767       11     2019         1      201         1         1   \n",
              "1    4005751110574       11     2019         1      201         1         1   \n",
              "2     110824593052       11     2019        -1      226        -1         1   \n",
              "3  147240092351000       11     2019        -1      226        -1         1   \n",
              "4  901002967110858       11     2019         1      201         1         1   \n",
              "\n",
              "   hetelhhd  hetelavl  hephoneo  ...   rewgt151   rewgt152   rewgt153  \\\n",
              "0         1        -1         1  ...  1082.2341  3694.0034  3672.5452   \n",
              "1         1        -1         1  ...  2763.0639  2740.3994  4684.8952   \n",
              "2        -1        -1         0  ...        NaN        NaN        NaN   \n",
              "3        -1        -1         0  ...        NaN        NaN        NaN   \n",
              "4         1        -1         1  ...  1937.6707  5593.3860  1928.8593   \n",
              "\n",
              "    rewgt154   rewgt155   rewgt156   rewgt157   rewgt158   rewgt159   rewgt160  \n",
              "0  3670.7370  3680.9228  1086.5708  3671.6112  3675.4022  6209.6508  6228.6944  \n",
              "1  2717.3707  2798.3753   799.2641  4587.7197   786.6383   784.2127  2774.1131  \n",
              "2        NaN        NaN        NaN        NaN        NaN        NaN        NaN  \n",
              "3        NaN        NaN        NaN        NaN        NaN        NaN        NaN  \n",
              "4  5379.6947  1977.4803  5507.7528  5505.6602  5451.0764  5423.9146  1949.2174  \n",
              "\n",
              "[5 rows x 989 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-92e89fc1-0eb3-4a2a-879c-7cf114c6b048\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hrhhid</th>\n",
              "      <th>hrmonth</th>\n",
              "      <th>hryear4</th>\n",
              "      <th>hurespli</th>\n",
              "      <th>hufinal</th>\n",
              "      <th>hetenure</th>\n",
              "      <th>hehousut</th>\n",
              "      <th>hetelhhd</th>\n",
              "      <th>hetelavl</th>\n",
              "      <th>hephoneo</th>\n",
              "      <th>...</th>\n",
              "      <th>rewgt151</th>\n",
              "      <th>rewgt152</th>\n",
              "      <th>rewgt153</th>\n",
              "      <th>rewgt154</th>\n",
              "      <th>rewgt155</th>\n",
              "      <th>rewgt156</th>\n",
              "      <th>rewgt157</th>\n",
              "      <th>rewgt158</th>\n",
              "      <th>rewgt159</th>\n",
              "      <th>rewgt160</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>110157414767</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1082.2341</td>\n",
              "      <td>3694.0034</td>\n",
              "      <td>3672.5452</td>\n",
              "      <td>3670.7370</td>\n",
              "      <td>3680.9228</td>\n",
              "      <td>1086.5708</td>\n",
              "      <td>3671.6112</td>\n",
              "      <td>3675.4022</td>\n",
              "      <td>6209.6508</td>\n",
              "      <td>6228.6944</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>4005751110574</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>2763.0639</td>\n",
              "      <td>2740.3994</td>\n",
              "      <td>4684.8952</td>\n",
              "      <td>2717.3707</td>\n",
              "      <td>2798.3753</td>\n",
              "      <td>799.2641</td>\n",
              "      <td>4587.7197</td>\n",
              "      <td>786.6383</td>\n",
              "      <td>784.2127</td>\n",
              "      <td>2774.1131</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>110824593052</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>226</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>147240092351000</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>226</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>901002967110858</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1937.6707</td>\n",
              "      <td>5593.3860</td>\n",
              "      <td>1928.8593</td>\n",
              "      <td>5379.6947</td>\n",
              "      <td>1977.4803</td>\n",
              "      <td>5507.7528</td>\n",
              "      <td>5505.6602</td>\n",
              "      <td>5451.0764</td>\n",
              "      <td>5423.9146</td>\n",
              "      <td>1949.2174</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 989 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-92e89fc1-0eb3-4a2a-879c-7cf114c6b048')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-92e89fc1-0eb3-4a2a-879c-7cf114c6b048 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-92e89fc1-0eb3-4a2a-879c-7cf114c6b048');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 93
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df19.tail()"
      ],
      "metadata": {
        "id": "9LDQxotQ5ULZ",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 299
        },
        "outputId": "792276d5-d06e-4708-c125-52dca4f51dfe"
      },
      "execution_count": 94,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                 hrhhid  hrmonth  hryear4  hurespli  hufinal  hetenure  \\\n",
              "138845   10005815764346       11     2019        -1      218         1   \n",
              "138846   50610634581470       11     2019        -1      218        -1   \n",
              "138847  965683190700009       11     2019        -1      218        -1   \n",
              "138848   75016847506821       11     2019         1      218         2   \n",
              "138849  211868005750476       11     2019        -1      218         2   \n",
              "\n",
              "        hehousut  hetelhhd  hetelavl  hephoneo  ...  rewgt151  rewgt152  \\\n",
              "138845         1         1        -1         1  ...       NaN       NaN   \n",
              "138846         1        -1        -1         0  ...       NaN       NaN   \n",
              "138847         1        -1        -1         0  ...       NaN       NaN   \n",
              "138848         1         1        -1         1  ...       NaN       NaN   \n",
              "138849         1         2         2         0  ...       NaN       NaN   \n",
              "\n",
              "        rewgt153  rewgt154  rewgt155  rewgt156  rewgt157  rewgt158  rewgt159  \\\n",
              "138845       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
              "138846       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
              "138847       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
              "138848       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
              "138849       NaN       NaN       NaN       NaN       NaN       NaN       NaN   \n",
              "\n",
              "        rewgt160  \n",
              "138845       NaN  \n",
              "138846       NaN  \n",
              "138847       NaN  \n",
              "138848       NaN  \n",
              "138849       NaN  \n",
              "\n",
              "[5 rows x 989 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-0f53ac29-e6d5-4c23-9e2c-ef0b472efcf4\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hrhhid</th>\n",
              "      <th>hrmonth</th>\n",
              "      <th>hryear4</th>\n",
              "      <th>hurespli</th>\n",
              "      <th>hufinal</th>\n",
              "      <th>hetenure</th>\n",
              "      <th>hehousut</th>\n",
              "      <th>hetelhhd</th>\n",
              "      <th>hetelavl</th>\n",
              "      <th>hephoneo</th>\n",
              "      <th>...</th>\n",
              "      <th>rewgt151</th>\n",
              "      <th>rewgt152</th>\n",
              "      <th>rewgt153</th>\n",
              "      <th>rewgt154</th>\n",
              "      <th>rewgt155</th>\n",
              "      <th>rewgt156</th>\n",
              "      <th>rewgt157</th>\n",
              "      <th>rewgt158</th>\n",
              "      <th>rewgt159</th>\n",
              "      <th>rewgt160</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>138845</th>\n",
              "      <td>10005815764346</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>218</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>138846</th>\n",
              "      <td>50610634581470</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>218</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>138847</th>\n",
              "      <td>965683190700009</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>218</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>138848</th>\n",
              "      <td>75016847506821</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>218</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>138849</th>\n",
              "      <td>211868005750476</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>218</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 989 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0f53ac29-e6d5-4c23-9e2c-ef0b472efcf4')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-0f53ac29-e6d5-4c23-9e2c-ef0b472efcf4 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-0f53ac29-e6d5-4c23-9e2c-ef0b472efcf4');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 94
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Extract Oregon data"
      ],
      "metadata": {
        "id": "nJPhbi4Xy2oR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df19 = df19.loc[df19['gestfips'] == 41]"
      ],
      "metadata": {
        "id": "f9kTNT-QzBR2"
      },
      "execution_count": 95,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df19.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RmJqfs3cz_k4",
        "outputId": "9b9852d5-1864-455a-c186-13189d5e9661"
      },
      "execution_count": 96,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2050, 989)"
            ]
          },
          "metadata": {},
          "execution_count": 96
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df19.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 299
        },
        "id": "p_oPtJPD2QpW",
        "outputId": "9daeb9bf-af70-4114-82af-5d3757806829"
      },
      "execution_count": 97,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "               hrhhid  hrmonth  hryear4  hurespli  hufinal  hetenure  \\\n",
              "8701  202409343110505       11     2019         1        1         2   \n",
              "8702  202409343110505       11     2019         1        1         2   \n",
              "8703  401760212077126       11     2019         1        1         1   \n",
              "8704  401760212077126       11     2019         1        1         1   \n",
              "8705  401760212077126       11     2019         1        1         1   \n",
              "\n",
              "      hehousut  hetelhhd  hetelavl  hephoneo  ...   rewgt151   rewgt152  \\\n",
              "8701         1         1        -1         1  ...  5725.4782  5856.2197   \n",
              "8702         1         1        -1         1  ...        NaN        NaN   \n",
              "8703         1         1         2         1  ...        NaN        NaN   \n",
              "8704         1         1         2         1  ...  7959.9438  1382.5205   \n",
              "8705         1         1         2         1  ...        NaN        NaN   \n",
              "\n",
              "       rewgt153   rewgt154   rewgt155   rewgt156   rewgt157   rewgt158  \\\n",
              "8701  2186.2943  2208.3493  5848.3867  2230.0361  2219.5544  5830.8312   \n",
              "8702        NaN        NaN        NaN        NaN        NaN        NaN   \n",
              "8703        NaN        NaN        NaN        NaN        NaN        NaN   \n",
              "8704  1322.3236  4726.3142  4695.0762  8070.8910  4633.8925  4675.6381   \n",
              "8705        NaN        NaN        NaN        NaN        NaN        NaN   \n",
              "\n",
              "       rewgt159   rewgt160  \n",
              "8701  5720.8921  5910.6827  \n",
              "8702        NaN        NaN  \n",
              "8703        NaN        NaN  \n",
              "8704  4573.5357  4593.0305  \n",
              "8705        NaN        NaN  \n",
              "\n",
              "[5 rows x 989 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-a0b3671a-be89-4f55-a76d-bf1226327b9a\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hrhhid</th>\n",
              "      <th>hrmonth</th>\n",
              "      <th>hryear4</th>\n",
              "      <th>hurespli</th>\n",
              "      <th>hufinal</th>\n",
              "      <th>hetenure</th>\n",
              "      <th>hehousut</th>\n",
              "      <th>hetelhhd</th>\n",
              "      <th>hetelavl</th>\n",
              "      <th>hephoneo</th>\n",
              "      <th>...</th>\n",
              "      <th>rewgt151</th>\n",
              "      <th>rewgt152</th>\n",
              "      <th>rewgt153</th>\n",
              "      <th>rewgt154</th>\n",
              "      <th>rewgt155</th>\n",
              "      <th>rewgt156</th>\n",
              "      <th>rewgt157</th>\n",
              "      <th>rewgt158</th>\n",
              "      <th>rewgt159</th>\n",
              "      <th>rewgt160</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>8701</th>\n",
              "      <td>202409343110505</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>5725.4782</td>\n",
              "      <td>5856.2197</td>\n",
              "      <td>2186.2943</td>\n",
              "      <td>2208.3493</td>\n",
              "      <td>5848.3867</td>\n",
              "      <td>2230.0361</td>\n",
              "      <td>2219.5544</td>\n",
              "      <td>5830.8312</td>\n",
              "      <td>5720.8921</td>\n",
              "      <td>5910.6827</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8702</th>\n",
              "      <td>202409343110505</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8703</th>\n",
              "      <td>401760212077126</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8704</th>\n",
              "      <td>401760212077126</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>7959.9438</td>\n",
              "      <td>1382.5205</td>\n",
              "      <td>1322.3236</td>\n",
              "      <td>4726.3142</td>\n",
              "      <td>4695.0762</td>\n",
              "      <td>8070.8910</td>\n",
              "      <td>4633.8925</td>\n",
              "      <td>4675.6381</td>\n",
              "      <td>4573.5357</td>\n",
              "      <td>4593.0305</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8705</th>\n",
              "      <td>401760212077126</td>\n",
              "      <td>11</td>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 989 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a0b3671a-be89-4f55-a76d-bf1226327b9a')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-a0b3671a-be89-4f55-a76d-bf1226327b9a button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-a0b3671a-be89-4f55-a76d-bf1226327b9a');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 97
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Drop extraneous variables"
      ],
      "metadata": {
        "id": "Yy0TGHMqiTjE"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df19.drop(df19.columns.difference(['hryear4', 'heinhome', 'hehomte1', 'heprinoh', 'hepsensi']), axis = 1, inplace = True)"
      ],
      "metadata": {
        "id": "cfM5l-Qd0T2o"
      },
      "execution_count": 99,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df19.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LfYvJ1Dg0i1D",
        "outputId": "57138fc5-ca0f-4e59-d0e9-5651b01dcf63"
      },
      "execution_count": 110,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(2050, 5)"
            ]
          },
          "metadata": {},
          "execution_count": 110
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df19.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "U0UzCsqQ1Ou-",
        "outputId": "4082bcac-1c1c-4b19-b8b2-8aabf455eecc"
      },
      "execution_count": 120,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      hryear4  heinhome  hehomte1  heprinoh  hepsensi\n",
              "8701     2019         1         1        -1        -1\n",
              "8702     2019         1         1        -1        -1\n",
              "8703     2019         1         1        -1        -1\n",
              "8704     2019         1         1        -1        -1\n",
              "8705     2019        -1        -1        -1        -1"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-55b864b7-7a54-48b3-b221-dcd4a5605be9\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hryear4</th>\n",
              "      <th>heinhome</th>\n",
              "      <th>hehomte1</th>\n",
              "      <th>heprinoh</th>\n",
              "      <th>hepsensi</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>8701</th>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8702</th>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8703</th>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8704</th>\n",
              "      <td>2019</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8705</th>\n",
              "      <td>2019</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-55b864b7-7a54-48b3-b221-dcd4a5605be9')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-55b864b7-7a54-48b3-b221-dcd4a5605be9 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-55b864b7-7a54-48b3-b221-dcd4a5605be9');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 120
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Load and clean 2021 data"
      ],
      "metadata": {
        "id": "qO4kyzY3545g"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Read and preview file"
      ],
      "metadata": {
        "id": "TzoyvzvXz1UU"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "path = '/content/drive/My Drive/iSchool/Data Services/Datasets/OregonInternet/'\n",
        "file = 'nov21pub.csv'\n",
        "df21 = pd.read_csv(path+file, encoding='UTF-8')"
      ],
      "metadata": {
        "id": "LF4aFpT56IOE"
      },
      "execution_count": 127,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df21.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2uYfbRdD6XD-",
        "outputId": "03ac0305-9924-48a6-c8f6-a67302a326cc"
      },
      "execution_count": 128,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(127375, 568)"
            ]
          },
          "metadata": {},
          "execution_count": 128
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df21.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 299
        },
        "id": "Exm3v8WU6XUd",
        "outputId": "c566969f-3ea1-45b2-fb19-2b76b164e834"
      },
      "execution_count": 129,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "            HRHHID  HRMONTH  HRYEAR4  HURESPLI  HUFINAL  FILLER  HETENURE  \\\n",
              "0  610905110108708       11     2021         1      201     NaN         1   \n",
              "1  610905110108708       11     2021         1      201     NaN         1   \n",
              "2  610905110108708       11     2021         1      201     NaN         1   \n",
              "3  610905110108708       11     2021         1      201     NaN         1   \n",
              "4  610905110108708       11     2021         1      201     NaN         1   \n",
              "\n",
              "   HEHOUSUT  HETELHHD  HETELAVL  ...  PXEDTRAI  PXEGOVTS  PXUSESVC  PXESRVCS  \\\n",
              "0         1         1        -1  ...         0         0         0         0   \n",
              "1         1         1        -1  ...        -1        -1        -1        -1   \n",
              "2         1         1        -1  ...        -1        -1        -1        -1   \n",
              "3         1         1        -1  ...        -1        -1        -1        -1   \n",
              "4         1         1        -1  ...        -1        -1        -1        -1   \n",
              "\n",
              "   PXECOMME  PXEGOODS  PXFINANC  PXVOICEA  PXHOMIOT  PWPRMWGT  \n",
              "0         0         0         0         0         0  99445490  \n",
              "1        -1        -1        -1        -1        -1         0  \n",
              "2        -1        -1        -1        -1        -1         0  \n",
              "3        -1        -1        -1        -1        -1         0  \n",
              "4        -1        -1        -1        -1        -1         0  \n",
              "\n",
              "[5 rows x 568 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-3484b21c-2a43-40d8-b0fe-51ec36a7c80e\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>HRHHID</th>\n",
              "      <th>HRMONTH</th>\n",
              "      <th>HRYEAR4</th>\n",
              "      <th>HURESPLI</th>\n",
              "      <th>HUFINAL</th>\n",
              "      <th>FILLER</th>\n",
              "      <th>HETENURE</th>\n",
              "      <th>HEHOUSUT</th>\n",
              "      <th>HETELHHD</th>\n",
              "      <th>HETELAVL</th>\n",
              "      <th>...</th>\n",
              "      <th>PXEDTRAI</th>\n",
              "      <th>PXEGOVTS</th>\n",
              "      <th>PXUSESVC</th>\n",
              "      <th>PXESRVCS</th>\n",
              "      <th>PXECOMME</th>\n",
              "      <th>PXEGOODS</th>\n",
              "      <th>PXFINANC</th>\n",
              "      <th>PXVOICEA</th>\n",
              "      <th>PXHOMIOT</th>\n",
              "      <th>PWPRMWGT</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>610905110108708</td>\n",
              "      <td>11</td>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>99445490</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>610905110108708</td>\n",
              "      <td>11</td>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>...</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>610905110108708</td>\n",
              "      <td>11</td>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>...</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>610905110108708</td>\n",
              "      <td>11</td>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>...</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>610905110108708</td>\n",
              "      <td>11</td>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>201</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "      <td>...</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>-1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 568 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3484b21c-2a43-40d8-b0fe-51ec36a7c80e')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-3484b21c-2a43-40d8-b0fe-51ec36a7c80e button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-3484b21c-2a43-40d8-b0fe-51ec36a7c80e');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 129
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Extract Oregon data"
      ],
      "metadata": {
        "id": "z_flHdb_6xDw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df21 = df21.loc[df21['GESTFIPS'] == 41]"
      ],
      "metadata": {
        "id": "McJPyJBW6x5d"
      },
      "execution_count": 130,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df21.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Z5sJYw_G7A1p",
        "outputId": "dcd59e98-63dc-4122-f75a-6734ea9c70d4"
      },
      "execution_count": 131,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(1939, 568)"
            ]
          },
          "metadata": {},
          "execution_count": 131
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Drop extraneous variables"
      ],
      "metadata": {
        "id": "1d80SXBL7HHv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df21.drop(df21.columns.difference(['HRYEAR4', 'HEINHOME', 'HEHOMTE1', 'HEPRINOH']), axis = 1, inplace = True)"
      ],
      "metadata": {
        "id": "c-iz21v6zzxE"
      },
      "execution_count": 132,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df21OR.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aClgXZ4w0Srn",
        "outputId": "e7005bea-4cec-41c7-c97b-ed3eb55aaa9b"
      },
      "execution_count": 133,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(1939, 4)"
            ]
          },
          "metadata": {},
          "execution_count": 133
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df21OR.head()\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "jCUbaFvt3thD",
        "outputId": "d4b25651-c9b9-4075-bb9f-69179d1ab0df"
      },
      "execution_count": 134,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      HRYEAR4  HEINHOME  HEHOMTE1  HEPRINOH\n",
              "8200     2021         1         1        -1\n",
              "8201     2021         1         1        -1\n",
              "8202     2021         1         1        -1\n",
              "8203     2021         1         1        -1\n",
              "8204     2021         1         1        -1"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-3e528998-b983-4ac4-acda-084d7713c688\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>HRYEAR4</th>\n",
              "      <th>HEINHOME</th>\n",
              "      <th>HEHOMTE1</th>\n",
              "      <th>HEPRINOH</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>8200</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8201</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8202</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8203</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8204</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>-1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3e528998-b983-4ac4-acda-084d7713c688')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-3e528998-b983-4ac4-acda-084d7713c688 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-3e528998-b983-4ac4-acda-084d7713c688');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 134
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df21.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "sai028e2uy7_",
        "outputId": "50694cf8-84fc-449f-b85e-fdd354656b7d"
      },
      "execution_count": 136,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      HRYEAR4 HEINHOME HEHOMTE1 HEPRINOH\n",
              "8200     2021        1        1         \n",
              "8201     2021        1        1         \n",
              "8202     2021        1        1         \n",
              "8203     2021        1        1         \n",
              "8204     2021        1        1         "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-b326c9a8-60c2-435c-9f9d-eb47c0396a57\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>HRYEAR4</th>\n",
              "      <th>HEINHOME</th>\n",
              "      <th>HEHOMTE1</th>\n",
              "      <th>HEPRINOH</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>8200</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8201</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8202</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8203</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8204</th>\n",
              "      <td>2021</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td></td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b326c9a8-60c2-435c-9f9d-eb47c0396a57')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-b326c9a8-60c2-435c-9f9d-eb47c0396a57 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-b326c9a8-60c2-435c-9f9d-eb47c0396a57');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 136
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##Rename columns to match 2019 variable names"
      ],
      "metadata": {
        "id": "O6ec5yIr8Z01"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df21.replace(to_replace = {'HRYEAR4': 'hryear4', 'HEINHOME': 'heinhome', 'HEHOMTE1' : 'hehomte1', 'HEPRINOH' : 'heprinoh'}, inplace=True) "
      ],
      "metadata": {
        "id": "8SmrBYIM8boy"
      },
      "execution_count": 152,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df21.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "5boFBe-Zww-w",
        "outputId": "7fd42ce1-facf-4afe-fe1c-a7234ac683d9"
      },
      "execution_count": 153,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   hryear4  heinhome  hehomte1  heprinoh\n",
              "0     2021       1.0       1.0       NaN\n",
              "1     2021       1.0       1.0       NaN\n",
              "2     2021       1.0       1.0       NaN\n",
              "3     2021       1.0       1.0       NaN\n",
              "4     2021       1.0       1.0       NaN"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-0e506aea-4505-49b5-92e7-3094e82b9889\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hryear4</th>\n",
              "      <th>heinhome</th>\n",
              "      <th>hehomte1</th>\n",
              "      <th>heprinoh</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2021</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2021</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2021</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2021</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2021</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0e506aea-4505-49b5-92e7-3094e82b9889')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-0e506aea-4505-49b5-92e7-3094e82b9889 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-0e506aea-4505-49b5-92e7-3094e82b9889');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 153
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Combine 2019 and 2021 data"
      ],
      "metadata": {
        "id": "ulz0NxLa_1hd"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df = pd.concat([df19, df21])"
      ],
      "metadata": {
        "id": "8Ckdv5D663lH"
      },
      "execution_count": 155,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aFx8rGwfx606",
        "outputId": "6d548b3a-cb81-48ec-cb9c-e31f7b2a93a0"
      },
      "execution_count": 157,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(3989, 5)"
            ]
          },
          "metadata": {},
          "execution_count": 157
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "YPZavRXMwBYT",
        "outputId": "e0de0dd2-77b4-4041-ee71-f1317de3adb9"
      },
      "execution_count": 156,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   hryear4  heinhome  hehomte1  heprinoh  hepsensi\n",
              "0     2019       1.0       1.0       NaN       NaN\n",
              "1     2019       1.0       1.0       NaN       NaN\n",
              "2     2019       1.0       1.0       NaN       NaN\n",
              "3     2019       1.0       1.0       NaN       NaN\n",
              "4     2019       NaN       NaN       NaN       NaN"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-ce0ac627-1058-40cd-93c8-f76686c2a1f1\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hryear4</th>\n",
              "      <th>heinhome</th>\n",
              "      <th>hehomte1</th>\n",
              "      <th>heprinoh</th>\n",
              "      <th>hepsensi</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2019</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ce0ac627-1058-40cd-93c8-f76686c2a1f1')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-ce0ac627-1058-40cd-93c8-f76686c2a1f1 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-ce0ac627-1058-40cd-93c8-f76686c2a1f1');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 156
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Remove null values"
      ],
      "metadata": {
        "id": "1KVcVFrLyfpy"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df.replace(to_replace = -1, value = '', inplace=True)"
      ],
      "metadata": {
        "id": "F_ujvCv1yf2v"
      },
      "execution_count": 160,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "caOC5L9Vy2wA",
        "outputId": "db2e642b-1670-4ec3-8d1f-5698efe5e4c2"
      },
      "execution_count": 161,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   hryear4  heinhome  hehomte1  heprinoh  hepsensi\n",
              "0     2019       1.0       1.0       NaN       NaN\n",
              "1     2019       1.0       1.0       NaN       NaN\n",
              "2     2019       1.0       1.0       NaN       NaN\n",
              "3     2019       1.0       1.0       NaN       NaN\n",
              "4     2019       NaN       NaN       NaN       NaN"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-ccbfe0d8-3227-46a6-9e59-955feefb2a78\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>hryear4</th>\n",
              "      <th>heinhome</th>\n",
              "      <th>hehomte1</th>\n",
              "      <th>heprinoh</th>\n",
              "      <th>hepsensi</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2019</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2019</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ccbfe0d8-3227-46a6-9e59-955feefb2a78')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-ccbfe0d8-3227-46a6-9e59-955feefb2a78 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-ccbfe0d8-3227-46a6-9e59-955feefb2a78');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 161
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Export to csv file"
      ],
      "metadata": {
        "id": "4ef6XxnT6w2F"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df.to_csv('/content/drive/My Drive/iSchool/Data Services/Datasets/OregonInternet/CPS-OR-combined-v1.csv', index=False)\n"
      ],
      "metadata": {
        "id": "W2YF143v5ePz"
      },
      "execution_count": 164,
      "outputs": []
    }
  ]
}